Purpose

This is a quick example on how to create a map in R using shapefiles provided by Statistics Canada. For the purpose of this example we have also collected population density data for map visualization.

Load Data

Load Population Data

df_pop <- read.csv('98-310-XWE2011002-1201.CSV')
colnames(df_pop)
## [1] "Geographic.name"                                                     
## [2] "Incompletely.enumerated.Indian.reserves.and.Indian.settlements..2011"
## [3] "Population..2011"                                                    
## [4] "Total.private.dwellings..2011"                                       
## [5] "Private.dwellings.occupied.by.usual.residents..2011"
head(df_pop)
##   Geographic.name
## 1          Canada
## 2             A0A
## 3             A0B
## 4             A0C
## 5             A0E
## 6             A0G
##   Incompletely.enumerated.Indian.reserves.and.Indian.settlements..2011
## 1                                                                    T
## 2                                                                     
## 3                                                                     
## 4                                                                     
## 5                                                                     
## 6                                                                     
##   Population..2011 Total.private.dwellings..2011
## 1         33476688                      14569633
## 2            46297                         23950
## 3            20985                         12585
## 4            12834                          8272
## 5            23384                         12733
## 6            36264                         21153
##   Private.dwellings.occupied.by.usual.residents..2011
## 1                                            13320614
## 2                                               18701
## 3                                                8854
## 4                                                5482
## 5                                                9659
## 6                                               14967
# only keep the Forward Sortation Area (FSA; first three characters of the Postal Code) and the pop count
df_pop <- subset(df_pop, select = c('Geographic.name', 'Population..2011'))
# Rename variables
colnames(df_pop) <- c('fsa', 'Population')

# drop the total for Canada as a whole, we are only interested in FSAs
df_pop <- subset(df_pop, fsa != 'Canada')
head(df_pop)
##   fsa Population
## 2 A0A      46297
## 3 A0B      20985
## 4 A0C      12834
## 5 A0E      23384
## 6 A0G      36264
## 7 A0H      18499

Load Canada FSA Shapefiles

can_sf <- readShapePoly('FSA/gfsa000b11a_e.shp')
## Warning: use rgdal::readOGR or sf::st_read
head(can_sf@data$CFSAUID)
## [1] R0G R0H R0J R0K R0L R0M
## 1621 Levels: A0A A0B A0C A0E A0G A0H A0J A0K A0L A0M A0N A0P A0R A1A ... Y1A

Prepare Base Map

# In Ubuntu, you may need to run the following command before installing packages rgeos, rgdal and gpclib, which are needed for the fortify command below:
# sudo apt-get install libgdal1-dev libproj-dev

can_df <- fortify(can_sf, region = 'CFSAUID')
rm(can_sf) # free up memory

# get map from Google Maps API
gmap <- qmap("Canada", zoom=3)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Canada&zoom=3&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Canada&sensor=false
## Warning: `panel.margin` is deprecated. Please use `panel.spacing` property
## instead
# build polygon boundaries from shapefile data
fsa_boundaries <- geom_polygon(aes(x=long, y=lat, group=group), data=can_df, color='black', fill=NA)

# let's have a look. no population for now
gmap + fsa_boundaries

The previous map, while it covered all of Canada, may not be ideal for observing FSA-level differences. Let’s zoom in.

gmap <- qmap("Montreal, Canada", zoom=12)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Montreal,+Canada&zoom=12&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Montreal,%20Canada&sensor=false
## Warning: `panel.margin` is deprecated. Please use `panel.spacing` property
## instead
gmap + fsa_boundaries

Merge Data

# add population counts to map data
can_df <- merge(can_df, df_pop, by.x='id', by.y='fsa', all.x=TRUE)

Population Map

fsa_boundaries_pop <- geom_polygon(aes(x=long, y=lat, group=group, fill=Population, alpha=0.2), data=can_df, color='black')
gmap + fsa_boundaries_pop + scale_fill_gradient(low='#ffeda0', high='#f03b20') + scale_alpha(guide='none')

This map is using a population scale that is difficult to understand, let’s redesign it.

quantile(df_pop$Population, probs=seq(0.0, 1, 0.1), na.rm=TRUE)
##       0%      10%      20%      30%      40%      50%      60%      70% 
##      0.0   3154.5   7149.0  10366.5  14037.0  17433.0  20971.0  25546.5 
##      80%      90%     100% 
##  31085.0  40383.0 135850.0

We see that 90% of Canadian FSAs have ~40k or below. Let’s create new categories that reflect this.

new_breaks <- seq(from=0, to=40000, by=10000)
new_labels <- comma(new_breaks)
new_labels[length(new_labels)] <- paste0(new_labels[length(new_labels)], '+') # append plus sign for the highest label
new_labels
## [1] "0"       "10,000"  "20,000"  "30,000"  "40,000+"
gmap + fsa_boundaries_pop +
  scale_fill_gradient(low='#ffeda0', high='#f03b20', limits=c(0, 40000), breaks=new_breaks, labels=new_labels) + 
  scale_alpha(guide='none')